{
 "cells": [
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": [
    "\n",
    "\n",
    "\n",
    " Pydantic is particularly useful for defining structured output schemas because it offers type hints and validation."
   ],
   "id": "73ac2e8d7149a806"
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": [
    "结构化输出的基本流程和基本原理（工具调用+输出格式验证）\n",
    "https://python.langchain.com/docs/concepts/structured_outputs/#structured-output-method\n",
    "\n",
    "如何使用pydantic或者其它比如TypedDict来实现结构化输出\n",
    "https://python.langchain.com/docs/how_to/structured_output/\n"
   ],
   "id": "25680dc00457308a"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "from typing import Optional, List\n",
    "\n",
    "from pydantic import BaseModel, Field\n",
    "\n",
    "\n",
    "class Person(BaseModel):\n",
    "    \"\"\"Information about a person.\"\"\"\n",
    "\n",
    "    # ^ Doc-string for the entity Person.\n",
    "    # This doc-string is sent to the LLM as the description of the schema Person,\n",
    "    # and it can help to improve extraction results.\n",
    "\n",
    "    # Note that:\n",
    "    # 1. Each field is an `optional` -- this allows the model to decline to extract it!\n",
    "    # 2. Each field has a `description` -- this description is used by the LLM.\n",
    "    # Having a good description can help improve extraction results.\n",
    "    name: Optional[str] = Field(default=None, description=\"The name of the person\")\n",
    "    hair_color: Optional[str] = Field(\n",
    "        default=None, description=\"The color of the person's hair if known\"\n",
    "    )\n",
    "    height_in_meters: Optional[str] = Field(\n",
    "        default=None, description=\"Height measured in meters\"\n",
    "    )\n",
    "    \n",
    "class Data(BaseModel):\n",
    "    \"\"\"Extracted data about people.\"\"\"\n",
    "\n",
    "    # Creates a model so that we can extract multiple entities.\n",
    "    people: List[Person]\n"
   ],
   "id": "562730f9a6d122a8"
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "另一种方式：使用typing_extentions中的Annotated，这样只用定义schema的功能，而不做结果格式验证",
   "id": "d63eccaa407ab34d"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "from typing import Optional\n",
    "\n",
    "from typing_extensions import Annotated, TypedDict\n",
    "\n",
    "\n",
    "# TypedDict\n",
    "class Joke(TypedDict):\n",
    "    \"\"\"Joke to tell user.\"\"\"\n",
    "\n",
    "    setup: Annotated[str, ..., \"The setup of the joke\"]\n",
    "\n",
    "    # Alternatively, we could have specified setup as:\n",
    "\n",
    "    # setup: str                    # no default, no description\n",
    "    # setup: Annotated[str, ...]    # no default, no description\n",
    "    # setup: Annotated[str, \"foo\"]  # default, no description\n",
    "\n",
    "    punchline: Annotated[str, ..., \"The punchline of the joke\"]\n",
    "    rating: Annotated[Optional[int], None, \"How funny the joke is, from 1 to 10\"]\n",
    "\n",
    "\n",
    "structured_llm = llm.with_structured_output(Joke)\n",
    "\n",
    "structured_llm.invoke(\"Tell me a joke about cats\")"
   ],
   "id": "1d489c3eb0bc3c0e"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "json_schema = {\n",
    "    \"title\": \"joke\",\n",
    "    \"description\": \"Joke to tell user.\",\n",
    "    \"type\": \"object\",\n",
    "    \"properties\": {\n",
    "        \"setup\": {\n",
    "            \"type\": \"string\",\n",
    "            \"description\": \"The setup of the joke\",\n",
    "        },\n",
    "        \"punchline\": {\n",
    "            \"type\": \"string\",\n",
    "            \"description\": \"The punchline to the joke\",\n",
    "        },\n",
    "        \"rating\": {\n",
    "            \"type\": \"integer\",\n",
    "            \"description\": \"How funny the joke is, from 1 to 10\",\n",
    "            \"default\": None,\n",
    "        },\n",
    "    },\n",
    "    \"required\": [\"setup\", \"punchline\"],\n",
    "}\n",
    "structured_llm = llm.with_structured_output(json_schema)\n",
    "\n",
    "structured_llm.invoke(\"Tell me a joke about cats\")"
   ],
   "id": "b613fa0c111f3b96"
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-08-01T02:07:59.121422Z",
     "start_time": "2025-08-01T02:07:59.117232Z"
    }
   },
   "cell_type": "code",
   "source": [
    "from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
    "\n",
    "# Define a custom prompt to provide instructions and any additional context.\n",
    "# 1) You can add examples into the prompt template to improve extraction quality\n",
    "# 2) Introduce additional parameters to take context into account (e.g., include metadata\n",
    "#    about the document from which the text was extracted.)\n",
    "prompt_template = ChatPromptTemplate.from_messages(\n",
    "    [\n",
    "        (\n",
    "            \"system\",\n",
    "            \"You are an expert extraction algorithm. \"\n",
    "            \"Only extract relevant information from the text. \"\n",
    "            \"If you do not know the value of an attribute asked to extract, \"\n",
    "            \"return null for the attribute's value.\",\n",
    "        ),\n",
    "        # Please see the how-to about improving performance with\n",
    "        # reference examples.\n",
    "        # MessagesPlaceholder('examples'),\n",
    "        (\"human\", \"{text}\"),\n",
    "    ]\n",
    ")"
   ],
   "id": "a0c380ebbdf361d1",
   "outputs": [],
   "execution_count": 20
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-08-01T02:07:59.141530Z",
     "start_time": "2025-08-01T02:07:59.137422Z"
    }
   },
   "cell_type": "code",
   "source": [
    "from dotenv import load_dotenv\n",
    "import os\n",
    "\n",
    "api_key = os.getenv(\"DeepSeek_API_KEY\")\n",
    "if not api_key:\n",
    "    # 加载当前目录的 .env 文件（文件内容类似：SECRET_KEY=xxx）\n",
    "    load_dotenv()\n",
    "    # 从加载的变量中获取值，用法同 os.getenv()\n",
    "    api_key = os.getenv(\"DeepSeek_API_KEY\")\n",
    "    os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
    "    os.environ[\"LANGSMITH_API_KEY\"] = os.getenv(\"LangSmith_API_KEY\")\n",
    "\n"
   ],
   "id": "edc18cbdd496ed52",
   "outputs": [],
   "execution_count": 21
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "from langchain_deepseek import ChatDeepSeek\n",
    "\n",
    "llm = ChatDeepSeek(\n",
    "    model=\"deepseek-chat\",\n",
    "    temperature=0,\n",
    "    max_tokens=None,\n",
    "    timeout=None,\n",
    "    max_retries=2,\n",
    "    api_key=api_key,\n",
    "    # other params...\n",
    ")"
   ],
   "id": "2669276050973972"
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": [
    "There are a few challenges when producing structured output with the above methods:\n",
    "\n",
    "When tool calling is used, tool call arguments needs to be parsed from a dictionary back to the original schema.\n",
    "\n",
    "In addition, the model needs to be instructed to always use the tool when we want to enforce structured output, which is a provider specific setting.\n",
    "\n",
    "When JSON mode is used, the output needs to be parsed into a JSON object."
   ],
   "id": "4d8ce9f021501e7d"
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "LangChain provides a method, with_structured_output(), that automates the process of binding the schema to the model and parsing the output. This helper function is available for all model providers that support structured output.",
   "id": "fd63b767c5c184bd"
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "With a schema defined, we need a way to instruct the model to use it. While one approach is to include this schema in the prompt and ask nicely for the model to use it, this is not recommended. Several more powerful methods that utilizes native features in the model provider's API are available.",
   "id": "222bc8b33a2afd31"
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "",
   "id": "ce75b9c0c936b60f"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "# Define schema\n",
    "schema = {\"foo\": \"bar\"}\n",
    "# Bind schema to model\n",
    "model_with_structure = model.with_structured_output(schema)\n",
    "# Invoke the model to produce structured output that matches the schema\n",
    "structured_output = model_with_structure.invoke(user_input)"
   ],
   "id": "89f8cd92eaacabf6"
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-08-01T02:08:01.306552Z",
     "start_time": "2025-08-01T02:08:01.298953Z"
    }
   },
   "cell_type": "code",
   "source": "structured_llm = llm.with_structured_output(schema=Person)",
   "id": "20d77c69985f9c96",
   "outputs": [],
   "execution_count": 23
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-08-01T02:08:06.443365Z",
     "start_time": "2025-08-01T02:08:01.327176Z"
    }
   },
   "cell_type": "code",
   "source": [
    "text = \"Alan Smith is 6 feet tall and has blond hair.\"\n",
    "prompt = prompt_template.invoke({\"text\": text})\n",
    "print(prompt.to_messages())\n",
    "structured_llm.invoke(prompt)"
   ],
   "id": "c3afef20fd83e2b6",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[SystemMessage(content=\"You are an expert extraction algorithm. Only extract relevant information from the text. If you do not know the value of an attribute asked to extract, return null for the attribute's value.\", additional_kwargs={}, response_metadata={}), HumanMessage(content='Alan Smith is 6 feet tall and has blond hair.', additional_kwargs={}, response_metadata={})]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Person(name='Alan Smith', hair_color='blond', height_in_meters=None)"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 24
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-08-01T02:08:06.553689Z",
     "start_time": "2025-08-01T02:08:06.542072Z"
    }
   },
   "cell_type": "code",
   "source": [
    "from langchain_core.utils.function_calling import tool_example_to_messages\n",
    "\n",
    "\n",
    "examples = [\n",
    "    (\n",
    "        \"The ocean is vast and blue. It's more than 20,000 feet deep.\",\n",
    "        Data(people=[]),    # 在这里是工具调用\n",
    "    ),\n",
    "    (\n",
    "        \"Fiona traveled far from France to Spain.\",\n",
    "        Data(people=[Person(name=\"Fiona\", height_in_meters=None, hair_color=None)]),\n",
    "    ),\n",
    "]\n",
    "\n",
    "\n",
    "messages = []\n",
    "\n",
    "for txt, tool_call in examples:\n",
    "    if tool_call.people:\n",
    "        # This final message is optional for some providers\n",
    "        ai_response = \"Detected people.\"\n",
    "    else:\n",
    "        ai_response = \"Detected no people.\"\n",
    "    # tool_example_to_messages的参数是：用户输入文本，工具调用，答案\n",
    "    messages.extend(tool_example_to_messages(txt, [tool_call], ai_response=ai_response))"
   ],
   "id": "232345a2704a8348",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "from langchain_openai import ChatOpenAI\n",
    "model = ChatOpenAI(model=\"gpt-4o\", temperature=0)\n",
    "# Bind responseformatter schema as a tool to the model\n",
    "model_with_tools = model.bind_tools([ResponseFormatter])\n",
    "# Invoke the model\n",
    "ai_msg = model_with_tools.invoke(\"What is the powerhouse of the cell?\")"
   ],
   "id": "4d859fe1ed24a61e"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "# Get the tool call arguments\n",
    "ai_msg.tool_calls[0][\"args\"]\n",
    "{'answer': \"The powerhouse of the cell is the mitochondrion. Mitochondria are organelles that generate most of the cell's supply of adenosine triphosphate (ATP), which is used as a source of chemical energy.\",\n",
    " 'followup_question': 'What is the function of ATP in the cell?'}\n",
    "# Parse the dictionary into a pydantic object\n",
    "pydantic_object = ResponseFormatter.model_validate(ai_msg.tool_calls[0][\"args\"])"
   ],
   "id": "7f8f66b2dab08747"
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": [
    "\n",
    "# Choosing between multiple schemas\n",
    "The simplest way to let the model choose from multiple schemas is to create a parent schema that has a Union-typed attribute."
   ],
   "id": "48feee96c0049002"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "from typing import Union\n",
    "\n",
    "\n",
    "class Joke(BaseModel):\n",
    "    \"\"\"Joke to tell user.\"\"\"\n",
    "\n",
    "    setup: str = Field(description=\"The setup of the joke\")\n",
    "    punchline: str = Field(description=\"The punchline to the joke\")\n",
    "    rating: Optional[int] = Field(\n",
    "        default=None, description=\"How funny the joke is, from 1 to 10\"\n",
    "    )\n",
    "\n",
    "\n",
    "class ConversationalResponse(BaseModel):\n",
    "    \"\"\"Respond in a conversational manner. Be kind and helpful.\"\"\"\n",
    "\n",
    "    response: str = Field(description=\"A conversational response to the user's query\")\n",
    "\n",
    "\n",
    "class FinalResponse(BaseModel):\n",
    "    final_output: Union[Joke, ConversationalResponse]\n",
    "\n",
    "\n",
    "structured_llm = llm.with_structured_output(FinalResponse)\n",
    "\n",
    "structured_llm.invoke(\"Tell me a joke about cats\")"
   ],
   "id": "d41e0a84c7b1ca29"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "from langchain_core.prompts import ChatPromptTemplate\n",
    "\n",
    "system = \"\"\"You are a hilarious comedian. Your specialty is knock-knock jokes. \\\n",
    "Return a joke which has the setup (the response to \"Who's there?\") and the final punchline (the response to \"<setup> who?\").\n",
    "\n",
    "Here are some examples of jokes:\n",
    "\n",
    "example_user: Tell me a joke about planes\n",
    "example_assistant: {{\"setup\": \"Why don't planes ever get tired?\", \"punchline\": \"Because they have rest wings!\", \"rating\": 2}}\n",
    "\n",
    "example_user: Tell me another joke about planes\n",
    "example_assistant: {{\"setup\": \"Cargo\", \"punchline\": \"Cargo 'vroom vroom', but planes go 'zoom zoom'!\", \"rating\": 10}}\n",
    "\n",
    "example_user: Now about caterpillars\n",
    "example_assistant: {{\"setup\": \"Caterpillar\", \"punchline\": \"Caterpillar really slow, but watch me turn into a butterfly and steal the show!\", \"rating\": 5}}\"\"\"\n",
    "\n",
    "prompt = ChatPromptTemplate.from_messages([(\"system\", system), (\"human\", \"{input}\")])\n",
    "\n",
    "few_shot_structured_llm = prompt | structured_llm\n",
    "few_shot_structured_llm.invoke(\"what's something funny about woodpeckers\")"
   ],
   "id": "72a717df52b2bec6"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "from langchain_core.messages import AIMessage, HumanMessage, ToolMessage\n",
    "\n",
    "examples = [\n",
    "    HumanMessage(\"Tell me a joke about planes\", name=\"example_user\"),\n",
    "    AIMessage(\n",
    "        \"\",\n",
    "        name=\"example_assistant\",\n",
    "        tool_calls=[\n",
    "            {\n",
    "                \"name\": \"joke\",\n",
    "                \"args\": {\n",
    "                    \"setup\": \"Why don't planes ever get tired?\",\n",
    "                    \"punchline\": \"Because they have rest wings!\",\n",
    "                    \"rating\": 2,\n",
    "                },\n",
    "                \"id\": \"1\",\n",
    "            }\n",
    "        ],\n",
    "    ),\n",
    "    # Most tool-calling models expect a ToolMessage(s) to follow an AIMessage with tool calls.\n",
    "    ToolMessage(\"\", tool_call_id=\"1\"),\n",
    "    # Some models also expect an AIMessage to follow any ToolMessages,\n",
    "    # so you may need to add an AIMessage here.\n",
    "    HumanMessage(\"Tell me another joke about planes\", name=\"example_user\"),\n",
    "    AIMessage(\n",
    "        \"\",\n",
    "        name=\"example_assistant\",\n",
    "        tool_calls=[\n",
    "            {\n",
    "                \"name\": \"joke\",\n",
    "                \"args\": {\n",
    "                    \"setup\": \"Cargo\",\n",
    "                    \"punchline\": \"Cargo 'vroom vroom', but planes go 'zoom zoom'!\",\n",
    "                    \"rating\": 10,\n",
    "                },\n",
    "                \"id\": \"2\",\n",
    "            }\n",
    "        ],\n",
    "    ),\n",
    "    ToolMessage(\"\", tool_call_id=\"2\"),\n",
    "    HumanMessage(\"Now about caterpillars\", name=\"example_user\"),\n",
    "    AIMessage(\n",
    "        \"\",\n",
    "        tool_calls=[\n",
    "            {\n",
    "                \"name\": \"joke\",\n",
    "                \"args\": {\n",
    "                    \"setup\": \"Caterpillar\",\n",
    "                    \"punchline\": \"Caterpillar really slow, but watch me turn into a butterfly and steal the show!\",\n",
    "                    \"rating\": 5,\n",
    "                },\n",
    "                \"id\": \"3\",\n",
    "            }\n",
    "        ],\n",
    "    ),\n",
    "    ToolMessage(\"\", tool_call_id=\"3\"),\n",
    "]\n",
    "system = \"\"\"You are a hilarious comedian. Your specialty is knock-knock jokes. \\\n",
    "Return a joke which has the setup (the response to \"Who's there?\") \\\n",
    "and the final punchline (the response to \"<setup> who?\").\"\"\"\n",
    "\n",
    "prompt = ChatPromptTemplate.from_messages(\n",
    "    [(\"system\", system), (\"placeholder\", \"{examples}\"), (\"human\", \"{input}\")]\n",
    ")\n",
    "few_shot_structured_llm = prompt | structured_llm\n",
    "few_shot_structured_llm.invoke({\"input\": \"crocodiles\", \"examples\": examples})"
   ],
   "id": "ce0535f2673d9833"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "from langchain_core.prompts.few_shot import FewShotPromptTemplate\n",
    "from langchain_core.prompts.prompt import PromptTemplate\n",
    "\n",
    "example_prompt = PromptTemplate.from_template(\"Input: {input} -> Output: {output}\")\n",
    "prompt = FewShotPromptTemplate(\n",
    "    example_selector=example_selector,\n",
    "    example_prompt=example_prompt,\n",
    "    suffix=\"Input: {input} -> Output:\",\n",
    "    prefix=\"Translate the following words from English to Italian:\",\n",
    "    input_variables=[\"input\"],\n",
    ")\n",
    "\n",
    "print(prompt.format(input=\"word\"))"
   ],
   "id": "50f26a51e8af9fb6"
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
